package com.spark.graph
import org.apache.spark.{SparkConf, SparkContext}

/**
  * Created by Administrator on 2017/7/27.
  */
object groupMain {

  def main(args: Array[String]): Unit = {
    val sim=0.6
    val conf=new SparkConf().setAppName("zjol").setMaster("local")
    val sc=new SparkContext(conf)

    val rdd=sc.textFile("F:\\data\\zjolnews.txt")

    val initRDD=rdd.map(x=>x.split("\t")).map(x=>(x(2).trim,x))

    initRDD.foreach(x=>println(x._1))

    println(initRDD.count())

    val groupRdd=initRDD.groupByKey()

    groupRdd.foreach(x=>println(x._1+"\t"+x._2.size))
    println(groupRdd.count())







  }

}
